from database.db_handler import MysqlHander
from common.my_http import MyHttp
import urllib.request
from bs4 import BeautifulSoup
from common.market_data import MarketData
import glob
import jieba
import re
import threading
import hashlib
import json
from common.entity_data import EntityData
from whoosh.index import create_in
from jieba.analyse import ChineseAnalyzer
from whoosh.fields import *

'''
来源：
'''

class WhooshIndexer:
    def __init__(self):
        self.entity = EntityData()
        jieba.load_userdict("./jieba_dict/user.dict")
        analyzer_mode = ChineseAnalyzer()
        schema = Schema(title=TEXT(stored=True, analyzer=analyzer_mode), content=TEXT(stored=True, analyzer=analyzer_mode))
        ix = create_in("indexdir", schema)
        self.writer = ix.writer()
    
    def process(self):
        id = 0
        while True:
            datas = self.entity.query_entity_for_index(id)
            if len(datas) == 0:
                break
            for data in datas:
                #self.writer.add_word("a股")
                #self.writer.add_word("A股")
                self.writer.add_document(title=data[1],content=data[2])
                id = data[0]
        self.writer.commit()

if __name__ == '__main__':
    d = WhooshIndexer()
    d.process()
